ManyFaces Pilot Ratings Data Prep

Code
library(tidyverse)

Project Structure

Read in the full project structure from the project file to map experiment names to numbers.

Code
proj <- jsonlite::read_json("data/project_1136_structure.json")

exp_data <- purrr::map_df(proj, \(comp) {
  if (comp$component_type == "exp") {
     data.frame(
       exp_id = comp$id,
       name = comp$name,
       res_name = comp$res_name,
       instructions = comp$instructions,
       question = comp$question,
       exptype = comp$exptype,
       trial_order = comp$trial_order,
       total_stim = comp$total_stim,
       random_stim = comp$random_stim,
       trials = length(comp$trial),
       stim = length(comp$stim)
     )
  } else {
    NULL
  }
}) |>
  mutate(exp = sub("ManyFaces? Pilot Ratings: ", "", res_name) |> trimws())


trial_data <- purrr::map_df(proj, \(comp) {
  if (comp$component_type == "exp") {
    purrr::map_df(comp$trial, \(trial) {
      data.frame(
        exp_id = comp$id,
        n = trial$trial_n,
        name = trial$name, 
        img_id = trial$center_img,
        img_path = comp$stimuli[[as.character(trial$center_img)]]
      )
    })
  } else {
    NULL
  }
}) |>
  mutate(name = sub("^(manyfaces|attention_checks)/", "", name))
Code
exp_data |>
  select(exp_id, exp, question, trials) |>
  arrange(exp)

Data Prep

Raw Data

This workflow requires the data-raw directory, which is not shared on github.

This is the SQL for downloading the data from Experimentum. We need to download in chunks of 50000 rows to avoid file download limits on the site (not needed if downloading directly from SQL).

SELECT 
  session.id as session_id, project_id, exp.res_name as exp_name, exp_id, 
  session.user_id, user.sex as user_sex, user.status as user_status,
  ROUND(DATEDIFF(ed.dt, REPLACE(birthday, "-00","-01"))/365.25, 1) AS user_age,
  trial.name as trial_name,
  trial_n,
  `order`,
  dv,
  rt,
  ed.side,
  ed.dt
FROM session 
  LEFT JOIN user USING (user_id)
  LEFT JOIN exp_data AS ed ON ed.session_id = session.id
  LEFT JOIN exp ON exp.id = ed.exp_id
  LEFT JOIN trial USING (exp_id, trial_n)
WHERE session.project_id = 1136
  AND user.status IN ("guest", "registered")
  AND exp_id IN (1384, 1400, 1399, 1398, 1401, 1402, 1403, 
                 1404, 1405, 1397, 1390, 1389, 1388, 1387, 
                 1386, 1385, 1382, 1381, 1380, 1379, 1377)
LIMIT 50000
OFFSET 0
Code
# combine multiple downloads into one file
exp_raw <- list.files("data-raw/exp", full.names = TRUE) |> 
  read_csv(show_col_types = FALSE) |>
  unique() |>
  filter(user_status %in% c("guest", "registered"))

write_csv(exp_raw, paste0("data-raw/ManyFaces-Pilot-Ratings-exps_", Sys.Date(), ".csv"))

# get most recent files
exp_file <- list.files("data-raw", "ManyFaces-Pilot-Ratings-exps",
                       full.names = TRUE) |>
  sort(decreasing = TRUE) |>
  pluck(1)

exp_raw <- read_csv(exp_file, show_col_types = FALSE) |>
  filter(user_status %in% c("guest", "registered")) |>
  unique()

# get most recent files
quest_file <- list.files("data-raw", "ManyFaces-Pilot-Ratings-quests", 
                       full.names = TRUE) |>
  sort(decreasing = TRUE) |>
  pluck(1)

quest_raw <- read_csv(quest_file, show_col_types = FALSE) |>
  filter(user_status %in% c("guest", "registered")) |>
  unique()

# write to the data directory
write_csv(exp_raw, "data/manyfaces-pilot-exp.csv")
write_csv(quest_raw, "data/manyfaces-pilot-quest.csv")

Reshape Data

Code
exp_raw <- read_csv("data/manyfaces-pilot-exp.csv", show_col_types = FALSE)
quest_raw <- read_csv("data/manyfaces-pilot-quest.csv", show_col_types = FALSE)
Code
ed <- exp_data |> select(exp_id, exp)

exp_long <- exp_raw |>
  select(session_id, exp_id, trial_name, dv, rt, dt) |>
  unique() |>
  mutate(trial_name = sub("^(manyfaces|attention_checks)/", "", trial_name)) |>
  left_join(ed, by = "exp_id")

Exclusions

Investigate distribution of RTs

Code
ggplot(exp_long, aes(x = rt)) +
  geom_histogram(bins = 100) +
  scale_x_log10(breaks = 10^(1:6),
                labels = c("10ms", "100ms", "1s", "10s", "100s", "1000s"))

And median RTs for peole who did at least 100 trials.

Code
med_rt_100 <- exp_long |>
  summarise(med_rt = median(rt), 
            n = n(),
            .by = session_id) |>
  filter(n > 100)

ggplot(med_rt_100, aes(x = med_rt)) +
  geom_histogram(bins = 100) +
  scale_x_continuous(breaks = seq(0, 10000, 500)) +
  labs(x = "Median Reaction Time (ms)")

Set RT median cutoff at 1% quantile?

Code
rt_cutoff <- quantile(med_rt_100$med_rt, probs = 0.01)
Code
# Calculate median RT and number of trials completed

ed <- select(exp_data, exp_id, trials)

rt <- exp_long |>
  summarise(start = min(dt),
            median_rt = median(rt), 
            mfvp = (table(dv) |> sort() |> tail(1) |> as.vector())/n(),
            n = n(),
            .by = c(session_id, exp_id)) |>
  left_join(ed, by = "exp_id")
Code
# Check the attention checks
checks <- exp_long |>
  select(session_id:dv) |>
  filter(grepl("check", trial_name)) |>
  mutate(check_type = sub("check_[a-z0-9-]+_", "", trial_name),
         check_type = ifelse(exp_id == 1400, substr(check_type, 4, 6), check_type)) |>
  summarise(checks_passed = mean(check_type == dv), 
            .by = c("session_id", "exp_id"))
Code
# Check try question
try <- quest_raw |>
  filter(q_name == "try") |>
  select(session_id, try = dv) |>
  unique()
Code
# combine to determine who gets excluded
start_date <- "2025-05-06"

to_exclude <- rt |>
  left_join(checks, by = c("session_id", "exp_id")) |>
  left_join(try, by = "session_id") |>
  filter(
    n != trials |
    interval(start, start_date) |> as.numeric("days") > 0 |
    is.na(checks_passed) |
    checks_passed < 5/7 |
    mfvp > 0.9 |
    median_rt < rt_cutoff |
    try != 2  
  ) 
Code
to_exclude |>
  mutate(reason = case_when(
    n < trials ~ "did not finish trials",
    n > trials ~ "too many trials",
    interval(start, start_date) |> as.numeric("days") > 0 ~ "date",
    checks_passed < 5/7 ~ "passed < 5/7 checks",
    mfvp > .9 ~ ">90% same response",
    median_rt < rt_cutoff ~ paste0("median RT < ", round(rt_cutoff)),
    try == 1 ~ "did not try",
    .default = "other")) |>
  count(reason, sort = TRUE)

Exclude 234 people and remove attention checks

Code
exp <- anti_join(exp_long, to_exclude, by = c("session_id", "exp_id")) |>
  filter(!grepl("check_", trial_name))
Code
endtimes <- quest_raw |>
  summarise(end = max(endtime), .by = c(session_id))

times <- exp |>
  summarise(start = min(dt), .by = c(session_id)) |>
  left_join(endtimes, by = "session_id") |>
  mutate(duration = interval(start, end) |> as.numeric("minutes"))

Number of remaining participants per study

There are 1922 included participants.

Code
exp |>
  summarise(.by = c(exp, session_id)) |>
  count(exp)

Demographics

Code
quest <- anti_join(quest_raw, to_exclude, by = c("session_id")) |>
  select(session_id, q_name, dv, endtime) |>
  unique() |>
  pivot_wider(names_from = q_name, values_from = dv) |>
  mutate(age = as.integer(age))

Age and Gender

Code
ggplot(quest, aes(x = age, fill = gender)) +
  geom_histogram(binwidth = 1) +
  scale_fill_manual(values = c("hotpink", "lightblue", "orchid"))

Residence

Code
count(quest, residence, sort = TRUE)

Ethnicity

Code
quest |>
  mutate(ethnicity = tolower(ethnicity)) |>
  count(ethnicity, sort = TRUE)

Devices

Code
count(quest, device, sort = TRUE)

Plots

Code
rainbow <- c("firebrick", "darkorange", "goldenrod", "darkgreen", "dodgerblue3", "darkorchid4")

Standardised Neutral Ratings

Code
exp_levels <- c("attractive", "trustworthy", "dominant", 
                "memorable", "gender-typical")

exp |>
  filter(exp_id %in% 1377:1382) |>
  mutate(dv = as.integer(dv),
         exp = factor(exp, exp_levels)) |>
  ggplot(aes(x = dv, fill = exp)) +
  geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
  facet_wrap(~exp, ncol = 3, axes = "all_x", drop = FALSE) +
  labs(title = "Standardised Neutral Ratings",
       x = "") +
  scale_x_continuous(breaks = 1:7) +
  scale_fill_manual(values = rainbow, drop = FALSE)

Code
# function to create heatmap visualisations
heatmap <- function(id, label) {
  exp |>
    filter(exp %in% id) |>
    separate(trial_name, c("lab", "id"), extra = "drop") |>
    count(lab, id, dv) |>
    ggplot(aes(x = dv, y = id, fill = n)) +
    geom_tile() +
    facet_wrap(~lab) +
    scale_fill_viridis_c() +
    labs(x = label, y = NULL, 
         title = paste(label, "Ratings")) +
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 90))
}

Attractiveness

Code
heatmap("attractive", "Attractiveness")

Trustworthiness

Code
heatmap("trustworthy", "Trustworthiness")

Dominance

Code
heatmap("dominant", "Dominance")

Memorableness

Code
heatmap("memorable", "Memorableness")

Gender Typicality

Code
heatmap("gender-typical", "Gender Typicality")

Unstandardised Neutral Ratings

Code
exp_labels <- c("attractive", "trustworthy", "dominant")
exp_levels <- paste(exp_labels, "(unstd)")

exp |>
  filter(exp_id %in% 1397:1399) |>
  mutate(dv = as.integer(dv), 
         exp = factor(exp, exp_levels, exp_labels)) |>
  ggplot(aes(x = dv, fill = exp)) +
  geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
  facet_wrap(~exp, ncol = 5, drop = FALSE) +
  labs(title = "Unstandardised Neutral Ratings",
       x = "") +
  scale_x_continuous(breaks = 1:7) +
  scale_fill_manual(values = rainbow, drop = FALSE)

Attractiveness (Unstandardised)

Code
heatmap("attractive (unstd)", "Attractiveness (Unstandardised)")

Trustworthiness (Unstandardised)

Code
heatmap("trustworthy (unstd)", "Trustworthiness (Unstandardised)")

Dominance (Unstandardised)

Code
heatmap("dominant (unstd)", "Dominance (Unstandardised)")

Emotion Ratings

Code
dv_levels <- c("anger", "disgust", "fear", 
                "happiness", "sadness", "surprise", "other")
emo_levels <- c("ang", "dis", "fea", "hap", "sad", "sur")
emo_labels <- paste(dv_levels[1:6], "faces")

exp |>
  filter(exp_id %in% c(1384, 1401:1405)) |>
  separate(trial_name, c("lab", "model", "type", "emo", "view")) |>
  mutate(dv = factor(dv, dv_levels),
         emo = factor(emo, emo_levels, emo_labels)) |>
  ggplot(aes(x = dv, fill = dv)) +
  geom_point(aes(x = x, colour = I(fill), fill = I(fill)), 
             data.frame(emo = factor(emo_levels, emo_levels, emo_labels),
                        x = 1:6,
                        fill = rainbow),
             size = 6.5, y = -60, shape = 18, show.legend = FALSE) +
  geom_bar(color = "transparent") +
  facet_wrap(~emo, axes = "all_x", drop = FALSE) +
  scale_x_discrete(labels = c("A", "D", "F", "H", "S", "U", "O")) +
  scale_fill_manual(values = c(rainbow, "grey"), drop = FALSE) +
  labs(title = "Emotion Ratings",
       x = "",
       fill = "Rated Emotion") +
  coord_cartesian(clip="off") +
  theme(axis.ticks.x = element_blank())

Emotion Intensity Ratings

Code
exp_levels <- c("anger", "disgust", "fear", 
                "happiness", "sadness", "surprise")
               
exp |>
  filter(exp_id %in% 1385:1390) |>
  mutate(dv = as.integer(dv),
         exp = factor(exp, exp_levels)) |>
  ggplot(aes(x = dv, fill = exp)) +
  geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
  facet_wrap(~exp, ncol = 3, axes = "all_x", drop = FALSE) +
  labs(title = "Emotion Intensity Ratings",
       x = "") +
  scale_fill_manual(values = rainbow, drop = FALSE) +
  scale_x_continuous(breaks = 1:7)

Age Ratings

Code
dv_levels <- seq(20, 85, 5)
dv_labels <- paste(dv_levels-4, "-", dv_levels )
dv_labels[14] <- "81+"

exp |>
  filter(exp_id %in% 1400) |>
  mutate(dv = factor(dv, dv_levels, dv_labels)) |>
  ggplot(aes(x = dv)) +
  geom_bar(color = "black", fill = "white") +
  scale_x_discrete(drop = FALSE) +
  labs(title = "Age Ratings",
       x = "")

Code
# exp |>
#   filter(exp_id %in% 1400) |>
#   mutate(dv = as.numeric(dv) - 2.5) |>
#   summarise(age = mean(dv), age_sd = sd(dv), .by = trial_name)
Code
exp |>
  filter(exp_id %in% 1400) |>
  mutate(dv = factor(dv, dv_levels, dv_labels)) |>
  mutate(trial_name = gsub("_std_neu_0", "", trial_name)) |>
  separate(trial_name, c("lab", "id")) |>
  count(lab, id, dv) |>
  ggplot(aes(x = dv, y = id, fill = n)) +
  geom_tile() +
  facet_wrap(~lab) +
  scale_fill_viridis_c() +
  labs(x = "Age", y = NULL) +
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 90))